msg_tool\scripts\bgi/
parser.rs

1use crate::ext::io::*;
2use crate::types::*;
3use crate::utils::encoding::decode_to_string;
4use anyhow::Result;
5use std::collections::HashMap;
6use std::io::{Seek, SeekFrom};
7
8#[allow(unused)]
9pub enum Inst {
10    /// short
11    H,
12    /// int
13    I,
14    /// code offset
15    C,
16    /// message offset
17    M,
18    /// name offset
19    N,
20    /// string offset
21    Z,
22}
23
24use Inst::*;
25
26const V0_INSTS: [(u16, &'static [Inst]); 160] = [
27    (0x0010, &[I, I, M]),
28    (0x0012, &[Z, Z]),
29    (0x0013, &[Z]),
30    (0x0014, &[Z]),
31    (0x0018, &[I, I, I, I, I]),
32    (0x0019, &[I, I, I, I]), // untested
33    (0x001A, &[I, I, I]),
34    (0x001B, &[Z, I, I, I]), // untested
35    (0x001F, &[I]),          // untested
36    (0x0022, &[I]),          // untested
37    (0x0024, &[I, I, I, I, I]),
38    (0x0025, &[I, I]),
39    (0x0028, &[Z, I]),
40    (0x0029, &[Z, Z, I]),
41    (0x002A, &[I]),
42    (0x002B, &[Z, I]),
43    (0x002C, &[Z, I, I, I, I, I, I, I, I]),
44    (0x002D, &[Z, I, I, I, I, I, I, I, I]),
45    (0x002E, &[I, I, I, I, I]),
46    (0x0030, &[Z, I]),    // untested
47    (0x0031, &[Z, I, I]), // untested
48    (0x0032, &[I]),       // untested
49    (0x0033, &[I]),       // untested
50    (0x0034, &[I, I]),
51    (0x0035, &[I]),
52    (0x0036, &[I]),
53    (0x0038, &[I, Z, I, I, I, I, I]),
54    (0x0039, &[I, I]),
55    (0x003A, &[I, Z, I, I, I, I, I, I, I, I]), // untested
56    (0x003B, &[I, I, I, I, I, I]),
57    (0x003C, &[I, I, I, I, I, I, I, I, I, I]),
58    (0x003D, &[I, I, I, I, I, I, I, I, I, I, I]),
59    (0x003F, &[I]),
60    (0x0040, &[I, I, Z, I, I]),
61    (0x0041, &[I, I, Z, I, I]),
62    (0x0042, &[I, I, Z, I]),
63    (0x0043, &[I, I, Z, I]),
64    (0x0044, &[I, I, Z, I]),
65    (0x0045, &[I, I, Z, I]),
66    (0x0046, &[I, Z, I]),
67    (0x0047, &[I, Z, I]),
68    (0x0048, &[I, I]),
69    (0x0049, &[I, I]),
70    (0x004A, &[I, Z, I]),
71    (0x004C, &[Z, I]), // untested
72    (0x004D, &[Z, I]), // untested
73    (0x004E, &[I]),    // untested
74    (0x004F, &[I]),    // untested
75    (0x0050, &[Z, I]),
76    (0x0051, &[Z, Z, I]),
77    (0x0052, &[I]),
78    (0x0053, &[Z, I]),
79    (0x0054, &[Z, I, I]),
80    (0x0060, &[I, I, I, I, I]),
81    (0x0061, &[I, I]), // untested
82    (0x0062, &[I, I, I, I, I, I]),
83    (0x0065, &[I]),
84    (0x0066, &[I, I]),
85    (0x0067, &[I]),
86    (0x0068, &[I]),
87    (0x0069, &[I]),
88    (0x006A, &[I]),
89    (0x006B, &[I]), // untested
90    (0x006C, &[I]), // untested
91    (0x006E, &[I, I, I]),
92    (0x006F, &[I]),
93    (0x0070, &[I, Z, I]),
94    (0x0071, &[I]),
95    (0x0072, &[I, I, I]),
96    (0x0073, &[I, I, I]),
97    (0x0074, &[I, Z, I]),
98    (0x0075, &[I]),
99    (0x0076, &[I, I, I]),
100    (0x0078, &[I, Z, I]), // untested
101    (0x0079, &[I]),       // untested
102    (0x007A, &[I, I, I]), // untested
103    (0x0080, &[I, Z, I, I]),
104    (0x0081, &[Z]),
105    (0x0082, &[I]),
106    (0x0083, &[I]),
107    (0x0084, &[I, Z, I]), // untested
108    (0x0085, &[Z]),
109    (0x0086, &[I]),
110    (0x0087, &[I]),
111    (0x0088, &[Z]),
112    (0x008C, &[I]),
113    (0x008D, &[I]), // untested
114    (0x008E, &[I]), // untested
115    (0x0090, &[I]), // untested
116    (0x0091, &[I]), // untested
117    (0x0092, &[I]),
118    (0x0093, &[I]),
119    (0x0094, &[I]),
120    (0x0098, &[I, I]),
121    (0x0099, &[I, I]),
122    (0x009A, &[I, I]), // untested
123    (0x009B, &[I, I]), // untested
124    (0x009C, &[I, I]), // untested
125    (0x009D, &[I, I]), // untested
126    (0x00A0, &[C]),
127    (0x00A1, &[I, C]),    // untested
128    (0x00A2, &[I, C]),    // untested
129    (0x00A3, &[I, I, C]), // untested
130    (0x00A4, &[I, I, C]),
131    (0x00A5, &[I, I, C]),
132    (0x00A6, &[I, I, C]),
133    (0x00A7, &[I, I, C]), // untested
134    (0x00A8, &[I, I, C]),
135    (0x00AC, &[C]), // untested
136    (0x00AE, &[I]),
137    (0x00C0, &[Z]),
138    (0x00C1, &[Z]),
139    (0x00C4, &[I]),
140    (0x00C8, &[Z]),
141    (0x00CA, &[I]), // untested
142    (0x00D4, &[I]), // untested
143    (0x00D8, &[I]),
144    (0x00D9, &[I]),
145    (0x00DA, &[I]),
146    (0x00DB, &[I]),
147    (0x00DC, &[I]),
148    (0x00F8, &[Z]),    // untested
149    (0x00F9, &[Z, I]), // untested
150    (0x00FE, &[H]),
151    (0x0110, &[Z, Z]),
152    (0x0111, &[I]),
153    (0x0120, &[I]),
154    (0x0121, &[I]),
155    (0x0128, &[Z, I, I]),
156    (0x012A, &[I, I]),
157    (0x0134, &[I, I]), // untested
158    (0x0135, &[I]),    // untested
159    (0x0136, &[I]),    // untested
160    (0x0138, &[I, Z, I, I, I, I, Z, I, I, I]),
161    (0x013B, &[I, I, I, I, I, I, I, I]),
162    (0x0140, &[I, I, Z, I, I, I, I]), // untested
163    (0x0141, &[I, I, Z, I, I, I, I]), // untested
164    (0x0142, &[I, I, Z, I, I, I]),    // untested
165    (0x0143, &[I, I, Z, I, I, I]),    // untested
166    (0x0144, &[I, I, Z, I, I, I]),    // untested
167    (0x0145, &[I, I, Z, I, I, I]),    // untested
168    (0x0146, &[I, Z, I, I, I]),       // untested
169    (0x0147, &[I, Z, I, I, I]),       // untested
170    (0x0148, &[I, I]),
171    (0x0149, &[I, I]),
172    (0x014B, &[Z, I, I, Z]),
173    (0x0150, &[Z, I, I]),
174    (0x0151, &[Z, I, I, I]), // untested
175    (0x0152, &[I, I]),
176    (0x0153, &[I, I, I]), // untested
177    (0x016E, &[I, I, I, I, I, I]),
178    (0x016F, &[I, I, I, I, I, I, I]), // untested
179    (0x0170, &[I, Z, Z, I, I]),
180    (0x01C0, &[Z, Z]),
181    (0x01C1, &[Z, Z]),          // untested
182    (0x0249, &[Z]),             // untested
183    (0x024C, &[Z, Z, I, I, I]), // untested
184    (0x024D, &[Z]),             // untested
185    (0x024E, &[Z, Z]),          // untested
186    (0x024F, &[Z]),             // untested
187];
188
189const V1_INSTS: [(u32, &'static [Inst]); 12] = [
190    (0x0000, &[I]),
191    (0x0001, &[C]),
192    (0x0002, &[I]),
193    //    (0x0003, &[M]),
194    (0x0008, &[I]),
195    (0x0009, &[I]),
196    (0x000A, &[I]),
197    (0x0017, &[I]),
198    (0x0019, &[I]),
199    (0x003F, &[I]),
200    (0x007B, &[I, I, I]),
201    (0x007E, &[I]),
202    (0x007F, &[I, I]),
203];
204
205lazy_static::lazy_static! {
206    pub static ref V0_INSTS_MAP: HashMap<u16, &'static [Inst]> = HashMap::from(V0_INSTS);
207    pub static ref V1_INSTS_MAP: HashMap<u32, &'static [Inst]> = HashMap::from(V1_INSTS);
208}
209
210#[derive(Debug, Clone)]
211pub enum BGIStringType {
212    Name,
213    Message,
214    Internal,
215    /// For v1 instructions.
216    /// Only old BGI scripts have this type. (Scripts that does not have a magic)
217    Ruby,
218}
219
220#[derive(Debug, Clone)]
221pub struct BGIString {
222    pub offset: usize,
223    pub address: usize,
224    pub typ: BGIStringType,
225}
226
227impl BGIString {
228    pub fn is_internal(&self) -> bool {
229        matches!(self.typ, BGIStringType::Internal)
230    }
231}
232
233pub struct V0Parser<'a> {
234    buf: MemReaderRef<'a>,
235    largest_code_address_pperand_encountered: usize,
236    pub strings: Vec<BGIString>,
237}
238
239impl<'a> V0Parser<'a> {
240    pub fn new(buf: MemReaderRef<'a>) -> Self {
241        V0Parser {
242            buf,
243            largest_code_address_pperand_encountered: 0,
244            strings: Vec::new(),
245        }
246    }
247
248    fn read_code_address(&mut self) -> Result<()> {
249        let address = self.buf.read_u32()?;
250        self.largest_code_address_pperand_encountered = std::cmp::max(
251            self.largest_code_address_pperand_encountered,
252            address as usize,
253        );
254        Ok(())
255    }
256
257    fn read_string_address(&mut self, typ: BGIStringType) -> Result<()> {
258        let offset = self.buf.pos;
259        let address = self.buf.read_u32()? as usize;
260        self.strings.push(BGIString {
261            offset,
262            address,
263            typ,
264        });
265        Ok(())
266    }
267
268    fn skip_inline_string(&mut self) -> Result<()> {
269        self.buf.read_cstring()?;
270        Ok(())
271    }
272
273    fn read_oper_00a9(&mut self) -> Result<()> {
274        let count = self.buf.read_u32()?;
275        for _ in 0..count {
276            self.read_code_address()?;
277        }
278        Ok(())
279    }
280
281    fn read_oper_00b0(&mut self) -> Result<()> {
282        let count = self.buf.read_u32()?;
283        for _ in 0..count {
284            self.skip_inline_string()?;
285        }
286        Ok(())
287    }
288
289    fn read_oper_00b4(&mut self) -> Result<()> {
290        // untested
291        let count = self.buf.read_u32()?;
292        for _ in 0..count {
293            self.skip_inline_string()?;
294        }
295        Ok(())
296    }
297
298    fn read_oper_00fd(&mut self) -> Result<()> {
299        // untested
300        let count = self.buf.read_u32()?;
301        for _ in 0..count {
302            self.skip_inline_string()?;
303            self.read_code_address()?;
304        }
305        Ok(())
306    }
307
308    fn read_opers(&mut self, templ: &'static [Inst]) -> Result<()> {
309        for t in templ.iter() {
310            match t {
311                H => {
312                    self.buf.read_i16()?;
313                }
314                I => {
315                    self.buf.read_i32()?;
316                }
317                C => {
318                    self.read_code_address()?;
319                }
320                M => {
321                    self.read_string_address(BGIStringType::Message)?;
322                }
323                Z => {
324                    self.skip_inline_string()?;
325                }
326                N => {
327                    self.read_string_address(BGIStringType::Name)?;
328                }
329            }
330        }
331        Ok(())
332    }
333
334    pub fn disassemble(&mut self) -> Result<()> {
335        loop {
336            let opcode = self.buf.read_u16()?;
337            if opcode == 0x00a9 {
338                self.read_oper_00a9()?;
339            } else if opcode == 0x00b0 {
340                self.read_oper_00b0()?;
341            } else if opcode == 0x00b4 {
342                self.read_oper_00b4()?;
343            } else if opcode == 0x00fd {
344                self.read_oper_00fd()?;
345            } else if let Some(templ) = V0_INSTS_MAP.get(&opcode) {
346                self.read_opers(templ)?;
347            }
348            if opcode == 0x00c2 && self.largest_code_address_pperand_encountered < self.buf.pos {
349                break;
350            }
351        }
352        Ok(())
353    }
354}
355
356struct StackItem {
357    pub offset: usize,
358    pub value: usize,
359}
360
361pub struct V1Parser<'a> {
362    buf: MemReaderRef<'a>,
363    largest_code_address_pperand_encountered: usize,
364    stacks: Vec<StackItem>,
365    encoding: Encoding,
366    pub offset: usize,
367    pub strings: Vec<BGIString>,
368}
369
370impl<'a> V1Parser<'a> {
371    pub fn new(mut buf: MemReaderRef<'a>, encoding: Encoding) -> Result<Self> {
372        if buf.data.len() < 32 {
373            return Err(anyhow::anyhow!("Buffer too small"));
374        }
375        let offset = if buf.data.starts_with(b"BurikoCompiledScriptVer1.00\0") {
376            28 + buf.peek_u32_at(28)? as u64
377        } else {
378            0
379        };
380        buf.seek(SeekFrom::Start(offset))?;
381        Ok(V1Parser {
382            buf,
383            largest_code_address_pperand_encountered: 0,
384            stacks: Vec::new(),
385            encoding,
386            offset: offset as usize,
387            strings: Vec::new(),
388        })
389    }
390
391    fn read_code_address(&mut self) -> Result<()> {
392        let address = self.buf.read_u32()?;
393        self.largest_code_address_pperand_encountered = std::cmp::max(
394            self.largest_code_address_pperand_encountered,
395            address as usize,
396        );
397        Ok(())
398    }
399
400    fn read_string_address(&mut self, typ: BGIStringType) -> Result<()> {
401        let offset = self.buf.pos;
402        let address = self.buf.read_u32()? as usize;
403        self.strings.push(BGIString {
404            offset,
405            address,
406            typ,
407        });
408        Ok(())
409    }
410
411    fn skip_inline_string(&mut self) -> Result<()> {
412        self.buf.read_cstring()?;
413        Ok(())
414    }
415
416    fn read_opers(&mut self, templ: &'static [Inst]) -> Result<()> {
417        for t in templ.iter() {
418            match t {
419                H => {
420                    self.buf.read_i16()?;
421                }
422                I => {
423                    self.buf.read_i32()?;
424                }
425                C => {
426                    self.read_code_address()?;
427                }
428                M => {
429                    self.read_string_address(BGIStringType::Message)?;
430                }
431                Z => {
432                    self.skip_inline_string()?;
433                }
434                N => {
435                    self.read_string_address(BGIStringType::Name)?;
436                }
437            }
438        }
439        Ok(())
440    }
441
442    fn read_push_string_address_operand(&mut self) -> Result<()> {
443        let offset = self.buf.pos;
444        let address = self.buf.read_u32()? as usize;
445        self.stacks.push(StackItem {
446            offset,
447            value: address,
448        });
449        Ok(())
450    }
451
452    pub fn is_empty_string(&self, address: usize) -> Result<bool> {
453        let start = self.offset + address;
454        let data = self.buf.cpeek_u8_at(start as u64)?;
455        Ok(data == 0)
456    }
457
458    pub fn read_string_at_address(&mut self, address: usize) -> Result<String> {
459        let start = self.offset + address;
460        let buf = self.buf.peek_cstring_at(start as u64)?;
461        // Sometimes string has private use area characters, so we disable strict checking
462        Ok(decode_to_string(self.encoding, buf.as_bytes(), false)?)
463    }
464
465    pub fn handle_user_function_call(&mut self) -> Result<()> {
466        let item = match self.stacks.pop() {
467            Some(item) => item,
468            None => return Ok(()),
469        };
470        self.strings.push(BGIString {
471            offset: item.offset,
472            address: item.value,
473            typ: BGIStringType::Internal,
474        });
475        let funcname = self.read_string_at_address(item.value)?;
476        if funcname == "_SelectEx" || funcname == "_SelectExtend" {
477            self.handle_choice_screen()?;
478        }
479        Ok(())
480    }
481
482    pub fn handle_ruby(&mut self) -> Result<()> {
483        let dest = self
484            .stacks
485            .pop()
486            .ok_or(anyhow::anyhow!("Stack underflow"))?;
487        let ori = self
488            .stacks
489            .pop()
490            .ok_or(anyhow::anyhow!("Stack underflow"))?;
491        self.strings.push(BGIString {
492            offset: ori.offset,
493            address: ori.value,
494            typ: BGIStringType::Ruby,
495        });
496        self.strings.push(BGIString {
497            offset: dest.offset,
498            address: dest.value,
499            typ: BGIStringType::Ruby,
500        });
501        Ok(())
502    }
503
504    pub fn handle_message_old(&mut self) -> Result<()> {
505        let item = self
506            .stacks
507            .pop()
508            .ok_or(anyhow::anyhow!("Stack underflow"))?;
509        match self.stacks.pop() {
510            Some(stack) => {
511                self.strings.push(BGIString {
512                    offset: item.offset,
513                    address: item.value,
514                    typ: if self.is_empty_string(item.value)? {
515                        BGIStringType::Internal
516                    } else {
517                        BGIStringType::Name
518                    },
519                });
520                self.strings.push(BGIString {
521                    offset: stack.offset,
522                    address: stack.value,
523                    typ: if self.is_empty_string(stack.value)? {
524                        BGIStringType::Internal
525                    } else {
526                        BGIStringType::Message
527                    },
528                });
529                return Ok(());
530            }
531            None => {}
532        }
533        self.strings.push(BGIString {
534            offset: item.offset,
535            address: item.value,
536            typ: if self.is_empty_string(item.value)? {
537                BGIStringType::Internal
538            } else {
539                BGIStringType::Message
540            },
541        });
542        Ok(())
543    }
544
545    pub fn handle_message(&mut self) -> Result<()> {
546        let item = self
547            .stacks
548            .pop()
549            .ok_or(anyhow::anyhow!("Stack underflow"))?;
550        match self.stacks.pop() {
551            Some(stack) => {
552                self.strings.push(BGIString {
553                    offset: stack.offset,
554                    address: stack.value,
555                    typ: if self.is_empty_string(stack.value)? {
556                        BGIStringType::Internal
557                    } else {
558                        BGIStringType::Name
559                    },
560                });
561            }
562            None => {}
563        }
564        self.strings.push(BGIString {
565            offset: item.offset,
566            address: item.value,
567            typ: if self.is_empty_string(item.value)? {
568                BGIStringType::Internal
569            } else {
570                BGIStringType::Message
571            },
572        });
573        Ok(())
574    }
575
576    pub fn handle_choice_screen(&mut self) -> Result<()> {
577        let mut choices = Vec::new();
578        loop {
579            match self.stacks.pop() {
580                Some(stack) => {
581                    choices.insert(0, stack);
582                }
583                None => break,
584            }
585        }
586        for choice in choices {
587            self.strings.push(BGIString {
588                offset: choice.offset,
589                address: choice.value,
590                typ: BGIStringType::Message,
591            });
592        }
593        Ok(())
594    }
595
596    pub fn disassemble(&mut self) -> Result<()> {
597        loop {
598            let opcode = self.buf.read_u32()?;
599            if opcode == 0x0003 {
600                self.read_push_string_address_operand()?;
601            } else if opcode == 0x001c {
602                self.handle_user_function_call()?;
603            } else if opcode == 0x0140 || opcode == 0x0143 {
604                self.handle_message()?;
605            } else if opcode == 0x0160 {
606                self.handle_choice_screen()?;
607            } else if let Some(templ) = V1_INSTS_MAP.get(&opcode) {
608                self.read_opers(templ)?;
609            } else if opcode == 0x0145 {
610                self.handle_message_old()?;
611            } else if opcode == 0x014e {
612                self.handle_ruby()?;
613            }
614            if (opcode == 0x001b || opcode == 0x00f4)
615                && self.largest_code_address_pperand_encountered < self.buf.pos - self.offset
616            {
617                break;
618            }
619            if opcode == 0x007e || opcode == 0x007f || opcode == 0x00fe || opcode == 0x01b5 {
620                self.output_internal_strings();
621            }
622        }
623        self.output_internal_strings();
624        Ok(())
625    }
626
627    pub fn output_internal_strings(&mut self) {
628        loop {
629            match self.stacks.pop() {
630                Some(stack) => {
631                    self.strings.push(BGIString {
632                        offset: stack.offset,
633                        address: stack.value,
634                        typ: BGIStringType::Internal,
635                    });
636                }
637                None => break,
638            }
639        }
640    }
641}